/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.pagedb;
import java.io.*;
import java.util.Arrays;
import net.nutch.io.*;
import net.nutch.db.*;
public class FetchListEntry implements Writable, Cloneable {
public static final String DIR_NAME = "fetchlist";
private final static byte CUR_VERSION = 2;
private boolean fetch;
private Page page;
private String[] anchors;
public FetchListEntry() {}
public FetchListEntry(boolean fetch, Page page, String[] anchors) {
this.fetch = fetch;
this.page = page;
this.anchors = anchors;
}
//
// Writable
//
public void readFields(DataInput in) throws IOException {
byte version = in.readByte(); // read version
if (version > CUR_VERSION) // check version
throw new VersionMismatchException(CUR_VERSION, version);
fetch = in.readByte() != 0; // read fetch flag
page = Page.read(in); // read page
if (version > 1) { // anchors added in version 2
anchors = new String[in.readInt()]; // read anchors
for (int i = 0; i < anchors.length; i++) {
anchors[i] = UTF8.readString(in);
}
} else {
anchors = new String[0];
}
}
public static FetchListEntry read(DataInput in) throws IOException {
FetchListEntry result = new FetchListEntry();
result.readFields(in);
return result;
}
public void write(DataOutput out) throws IOException {
out.writeByte(CUR_VERSION); // store current version
out.writeByte((byte)(fetch ? 1 : 0)); // write fetch flag
page.write(out); // write page
out.writeInt(anchors.length); // write anchors
for (int i = 0; i < anchors.length; i++) {
UTF8.writeString(out, anchors[i]);
}
}
//
// Accessor methods
//
public boolean getFetch() { return fetch; }
public Page getPage() { return page; }
public String[] getAnchors() { return anchors; }
public boolean equals(Object o) {
if (!(o instanceof FetchListEntry))
return false;
FetchListEntry other = (FetchListEntry)o;
return
this.fetch == other.fetch &&
this.page.equals(other.page) &&
Arrays.equals(this.anchors, other.anchors);
}
public Object clone() {
try {
FetchListEntry clone = (FetchListEntry)super.clone();
clone.page = (Page)clone.page.clone();
clone.anchors = new String[this.anchors.length];
System.arraycopy(this.anchors, 0, clone.anchors, 0, this.anchors.length);
return clone;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("version: " + CUR_VERSION + "\n");
buffer.append("fetch: " + fetch + "\n");
buffer.append("page: " + page + "\n");
buffer.append("anchors: " + anchors.length + "\n" );
for (int i = 0; i < anchors.length; i++) {
buffer.append(" anchor: " + anchors[i] + "\n");
}
return buffer.toString();
}
public static void main(String argv[]) throws Exception {
String usage = "FetchListEntry [ -recno N | -dumpurls ] segmentDir";
if (argv.length < 1) {
System.out.println("Usage: " + usage);
System.exit(-1);
}
int recno = -1;
String segment = null;
boolean dumpUrls = false;
for (int i = 0; i < argv.length; i++) {
if ("-dumpurls".equals(argv[i])) {
dumpUrls = true;
} else if ("-recno".equals(argv[i])) {
recno = Integer.parseInt(argv[++i]);
} else {
segment = argv[i];
}
}
FetchListEntry fle = new FetchListEntry();
ArrayFile.Reader fetchlist =
new ArrayFile.Reader(new File(segment, FetchListEntry.DIR_NAME).getPath());
if (dumpUrls) {
int count = 0;
while (fetchlist.next(fle) != null) {
System.out.println("Recno " + count + ": " + fle.getPage().getURL());
count++;
}
}
if (recno != -1) {
fetchlist.get(recno, fle);
System.out.println(fle);
}
fetchlist.close();
}
}